Libraries
library(survival)
library(FRESA.CAD)
## Loading required package: Rcpp
## Loading required package: stringr
## Loading required package: miscTools
## Loading required package: Hmisc
##
## Attaching package: 'Hmisc'
## The following objects are masked from 'package:base':
##
## format.pval, units
## Loading required package: pROC
## Type 'citation("pROC")' for a citation.
##
## Attaching package: 'pROC'
## The following objects are masked from 'package:stats':
##
## cov, smooth, var
op <- par(no.readonly = TRUE)
pander::panderOptions('digits', 3)
pander::panderOptions('table.split.table', 400)
pander::panderOptions('keep.trailing.zeros',TRUE)
source("C:/Users/jtame/Documents/GitHub/RISKPLOTS/CODE/auxplots.R")
The data set
data(cancer)
colon <- subset(colon,etype==1)
colon$etype <- NULL
rownames(colon) <- colon$id
colon$id <- NULL
colon <- colon[complete.cases(colon),]
time <- colon$time
status <- colon$status
data <- colon
data$time <- NULL
data$study <- NULL
table(data$status)
0 1 442 446
dataColon <- as.data.frame(model.matrix(status~.*age,data))
dataColon$`(Intercept)` <- NULL
dataColon$time <- time/365
dataColon$status <- status
colnames(dataColon) <-str_replace_all(colnames(dataColon),":","_")
colnames(dataColon) <-str_replace_all(colnames(dataColon),"\\.","_")
colnames(dataColon) <-str_replace_all(colnames(dataColon),"\\+","_")
data <- NULL
trainsamples <- sample(nrow(dataColon),0.7*nrow(dataColon))
dataColonTrain <- dataColon[trainsamples,]
dataColonTest <- dataColon[-trainsamples,]
pander::pander(table(dataColonTrain$status))
pander::pander(table(dataColonTest$status))
Cox Model Performance
Here we evaluate the model using the RRPlot() function.
The evaluation of the raw Cox model with RRPlot()
Here we will use the predicted event probability assuming a baseline
hazard for events withing 5 years
index <- predict(ml,dataColonTrain)
timeinterval <- round(2*mean(subset(dataColonTrain,status==1)$time),0)
timeinterval <- 2
h0 <- sum(dataColonTrain$status & dataColonTrain$time <= timeinterval)
h0 <- h0/sum((dataColonTrain$time > timeinterval) | (dataColonTrain$status==1))
rdata <- cbind(dataColonTrain$status,ppoisGzero(index,h0))
rrAnalysisTrain <- RRPlot(rdata,atRate=c(0.90,0.80),
timetoEvent=dataColonTrain$time,
title="Raw Train: Colon Cancer",
ysurvlim=c(0.00,1.0),
riskTimeInterval=timeinterval)






By Risk Categories
obsexp <- rrAnalysisTrain$OERatio$atThrEstimates
expObs(obsexp,"Train: Expected vs. Observed")

pander::pander(obsexp)
| Total |
310 |
276.4 |
346.5 |
455.2 |
0.681 |
0.607 |
0.761 |
6.78e-13 |
| low |
172 |
147.3 |
199.7 |
289.7 |
0.594 |
0.508 |
0.689 |
1.13e-13 |
| 90% |
52 |
38.8 |
68.2 |
55.1 |
0.944 |
0.705 |
1.238 |
7.36e-01 |
| 80% |
86 |
68.8 |
106.2 |
97.7 |
0.880 |
0.704 |
1.087 |
2.65e-01 |
Time to Event Analysis
rrAnalysisdata <- rrAnalysisTrain
pander::pander(wilcox.test(rrAnalysisdata$timetoEventData$eTime,rrAnalysisdata$timetoEventData$expectedTime,paired = TRUE))
Wilcoxon signed rank test with continuity correction:
rrAnalysisdata$timetoEventData$eTime and
rrAnalysisdata$timetoEventData$expectedTime
| 140890 |
3.77e-23 * * * |
two.sided |
highrisk <- rrAnalysisdata$timetoEventData$class == 2
pander::pander(wilcox.test(rrAnalysisdata$timetoEventData$eTime[highrisk],rrAnalysisdata$timetoEventData$expectedTime[highrisk],paired = TRUE))
Wilcoxon signed rank test with continuity correction:
rrAnalysisdata$timetoEventData$eTime[highrisk] and
rrAnalysisdata$timetoEventData$expectedTime[highrisk]
| 3991 |
0.197 |
two.sided |
timesdata <- expObsTime(rrAnalysisdata,title="Train: Expected vs Observed")

pander::pander(timesdata)
| 1Q |
1.65 |
0.486 |
0.542 |
2.84 |
2.04 |
1.27 |
| Median |
5.61 |
1.356 |
1.145 |
2.98 |
2.38 |
1.47 |
| 3Q |
6.61 |
6.001 |
4.721 |
3.65 |
2.50 |
1.65 |
Cox Calibration
op <- par(no.readonly = TRUE)
calprob <- CoxRiskCalibration(ml,dataColonTrain,"status","time")
( 3.150615 , 3115.997 , 1.028938 , 310 , 519.5296 )
pander::pander(c(h0=calprob$h0,
Gain=calprob$hazardGain,
TimeInterval=calprob$timeInterval),
caption="Cox Calibration Parameters")
By Risk Categories
obsexp <- rrAnalysisCalTrain$OERatio$atThrEstimates
expObs(obsexp,"Cal: Expected vs. Observed")

pander::pander(obsexp)
| Total |
310 |
276.4 |
346.5 |
525.0 |
0.590 |
0.527 |
0.660 |
3.98e-24 |
| low |
172 |
147.3 |
199.7 |
334.1 |
0.515 |
0.441 |
0.598 |
1.85e-22 |
| 90% |
52 |
38.8 |
68.2 |
63.6 |
0.818 |
0.611 |
1.073 |
1.67e-01 |
| 80% |
86 |
68.8 |
106.2 |
112.7 |
0.763 |
0.610 |
0.942 |
1.09e-02 |
Time to Event Analysis
rrAnalysisdata <- rrAnalysisCalTrain
pander::pander(wilcox.test(rrAnalysisdata$timetoEventData$eTime,rrAnalysisdata$timetoEventData$expectedTime,paired = TRUE))
Wilcoxon signed rank test with continuity correction:
rrAnalysisdata$timetoEventData$eTime and
rrAnalysisdata$timetoEventData$expectedTime
| 149550 |
2.26e-32 * * * |
two.sided |
highrisk <- rrAnalysisdata$timetoEventData$class == 2
pander::pander(wilcox.test(rrAnalysisdata$timetoEventData$eTime[highrisk],rrAnalysisdata$timetoEventData$expectedTime[highrisk],paired = TRUE))
Wilcoxon signed rank test with continuity correction:
rrAnalysisdata$timetoEventData$eTime[highrisk] and
rrAnalysisdata$timetoEventData$expectedTime[highrisk]
| 4277 |
0.0397 * |
two.sided |
timesdata <- expObsTime(rrAnalysisdata,title="Cal: Expected vs Observed")

pander::pander(timesdata)
| 1Q |
1.65 |
0.486 |
0.542 |
2.46 |
1.77 |
1.10 |
| Median |
5.61 |
1.356 |
1.145 |
2.58 |
2.06 |
1.28 |
| 3Q |
6.61 |
6.001 |
4.721 |
3.17 |
2.17 |
1.43 |
By Risk Categories
obsexp <- rrAnalysisTest$OERatio$atThrEstimates
expObs(obsexp,"Test: Expected vs. Observed")

pander::pander(obsexp)
| Total |
136 |
114.1 |
160.9 |
214.3 |
0.635 |
0.532 |
0.751 |
1.33e-08 |
| low |
72 |
56.3 |
90.7 |
134.0 |
0.537 |
0.420 |
0.677 |
6.25e-09 |
| 90% |
21 |
13.0 |
32.1 |
30.1 |
0.698 |
0.432 |
1.067 |
1.01e-01 |
| 80% |
43 |
31.1 |
57.9 |
58.6 |
0.733 |
0.531 |
0.988 |
4.25e-02 |
Time to Event Analysis
rrAnalysisdata <- rrAnalysisTest
pander::pander(wilcox.test(rrAnalysisdata$timetoEventData$eTime,rrAnalysisdata$timetoEventData$expectedTime,paired = TRUE))
Wilcoxon signed rank test with continuity correction:
rrAnalysisdata$timetoEventData$eTime and
rrAnalysisdata$timetoEventData$expectedTime
| 27657 |
1.04e-14 * * * |
two.sided |
highrisk <- rrAnalysisdata$timetoEventData$class == 2
pander::pander(wilcox.test(rrAnalysisdata$timetoEventData$eTime[highrisk],rrAnalysisdata$timetoEventData$expectedTime[highrisk],paired = TRUE))
Wilcoxon signed rank test with continuity correction:
rrAnalysisdata$timetoEventData$eTime[highrisk] and
rrAnalysisdata$timetoEventData$expectedTime[highrisk]
| 1136 |
0.0587 |
two.sided |
timesdata <- expObsTime(rrAnalysisdata,title="Test: Expected vs Observed")

pander::pander(timesdata)
| 1Q |
1.49 |
1.12 |
0.63 |
2.46 |
1.84 |
1.17 |
| Median |
5.50 |
1.86 |
1.20 |
2.67 |
1.99 |
1.34 |
| 3Q |
6.35 |
5.73 |
4.16 |
3.14 |
2.16 |
1.43 |
Cross-Validation
Here we will cross validate the training set and evaluate also on the
testing set. The h0 and the timeinterval are the ones estimated on the
calibration process
rcv <- randomCV(theData=dataColonTrain,
theOutcome = Surv(time,status)~1,
fittingFunction=BSWiMS.model,
trainFraction = 0.75,
repetitions=50,
classSamplingType = "Pro",
testingSet=dataColonTest
)
.[++++].[++++].[+++].[++++].[+++-].[+++].[++++].[+++].[++++].[+++]10
Tested: 849 Avg. Selected: 8.4 Min Tests: 1 Max Tests: 10 Mean Tests:
4.982332 . MAD: 0.4704607
.[+++-].[++++-].[+++++].[++++++].[+++].[+++-].[+++-].[++++++].[+++].[++++-]20
Tested: 885 Avg. Selected: 8.15 Min Tests: 1 Max Tests: 20 Mean Tests:
9.559322 . MAD: 0.4693351
.[++++].[+++-].[+++-].[++++].[++++].[+++-].[++++++].[+++++].[+++++].[++++]30
Tested: 888 Avg. Selected: 8.333333 Min Tests: 1 Max Tests: 30 Mean
Tests: 14.29054 . MAD: 0.4695776
.[++++–].[++++-].[++–].[++++].[++++].[+++++].[+++++].[+++++].[++++++].[++++]40
Tested: 888 Avg. Selected: 8.425 Min Tests: 1 Max Tests: 40 Mean Tests:
19.05405 . MAD: 0.4702961
.[++++-].[++++].[++++].[++++].[++++-].[+++].[+++++].[+++].[++++].[+++-]50
Tested: 888 Avg. Selected: 8.34 Min Tests: 1 Max Tests: 50 Mean Tests:
23.81757 . MAD: 0.4705424
stp <- rcv$survTestPredictions
stp <- stp[!is.na(stp[,4]),]
bbx <- boxplot(unlist(stp[,1])~rownames(stp),plot=FALSE)
times <- bbx$stats[3,]
status <- boxplot(unlist(stp[,2])~rownames(stp),plot=FALSE)$stats[3,]
prob <- ppoisGzero(boxplot(unlist(stp[,4])~rownames(stp),plot=FALSE)$stats[3,],calprob$h0)
rdatacv <- cbind(status,prob)
rownames(rdatacv) <- bbx$names
names(times) <- bbx$names
rrAnalysisCVTest <- RRPlot(rdatacv,atThr = rrAnalysisCalTrain$thr_atP,
timetoEvent=times,
title="CV Test: Colon Cancer",
ysurvlim=c(0.00,1.0),
riskTimeInterval=calprob$timeInterval)






By Risk Categories
obsexp <- rrAnalysisCVTest$OERatio$atThrEstimates
expObs(obsexp,"CV: Expected vs. Observed")

pander::pander(obsexp)
| Total |
446 |
405.6 |
489 |
774 |
0.576 |
0.524 |
0.632 |
2.07e-37 |
| low |
219 |
191.0 |
250 |
436 |
0.502 |
0.438 |
0.573 |
1.83e-30 |
| 90% |
90 |
72.4 |
111 |
126 |
0.714 |
0.574 |
0.878 |
9.60e-04 |
| 80% |
137 |
115.0 |
162 |
198 |
0.692 |
0.581 |
0.818 |
6.14e-06 |
Time to Event Analysis
rrAnalysisdata <- rrAnalysisCVTest
pander::pander(wilcox.test(rrAnalysisdata$timetoEventData$eTime,rrAnalysisdata$timetoEventData$expectedTime,paired = TRUE))
Wilcoxon signed rank test with continuity correction:
rrAnalysisdata$timetoEventData$eTime and
rrAnalysisdata$timetoEventData$expectedTime
| 306656 |
2.32e-46 * * * |
two.sided |
highrisk <- rrAnalysisdata$timetoEventData$class == 2
pander::pander(wilcox.test(rrAnalysisdata$timetoEventData$eTime[highrisk],rrAnalysisdata$timetoEventData$expectedTime[highrisk],paired = TRUE))
Wilcoxon signed rank test with continuity correction:
rrAnalysisdata$timetoEventData$eTime[highrisk] and
rrAnalysisdata$timetoEventData$expectedTime[highrisk]
| 11694 |
0.000962 * * * |
two.sided |
timesdata <- expObsTime(rrAnalysisdata,title="CV: Expected vs Observed")

pander::pander(timesdata)
| 1Q |
1.70 |
0.764 |
0.551 |
2.40 |
1.83 |
1.08 |
| Median |
5.58 |
2.144 |
1.153 |
2.67 |
2.09 |
1.24 |
| 3Q |
6.60 |
5.999 |
4.989 |
3.21 |
2.17 |
1.38 |